* START_3273_CONCRETE_2002_07.SAS -- produces CONCRETE datasets;


%include "ASMimplibs.sas";


*OPTIONS obs=5000;run;


* PART 1: RAW DATA FOR PRODUCERS OF PRODUCT;
%macro chk(y,p);

 data chk&y;
   set cmf.cmf&y.prod (keep = survu_id NAICSPC );
   if NAICSPC in (&p);
 run;

 title "&y -- # of product trailer records with NAICSPC=ready-mix concrete";run;
 proc freq data=chk&y;table NAICSPC;run;



 proc sort data=chk&y nodupkey out=est&y;by survu_id;

 title "&y -- # of estabs that have ready-mix concrete in product trailer file";run;
 title2 "ignore the actual products in this case";run;
 proc freq data=est&y;table NAICSPC;run;

 * now get all of the product records for these guys;
 * these other products will be used in creating the PPSR measure;
 * so dont want to include '9', balancing codes, admin records;
 * or pv<0;

 data fhs.conc&y;
  merge est&y(in=one keep=survu_id)
  cmf.cmf&y.prod(keep=survu_id NAICSPC pqs pv);
   by survu_id;
   if one=1 and pv ne . and NAICSPC ne "";
   if NAICSPC in (&p) then cflag=1;else cflag=0; *create a flag for concrete data;
   if pqs>0 then pflag=1;else pflag=0;
   NAICSPC1=substr(NAICSPC,1,1);
   NAICSPC7=substr(NAICSPC,7,4);
   NAICSPC8=substr(NAICSPC,1,8);

   if NAICSPC1='9' or NAICSPC8 in ('77000000' , '00093000') OR NAICSPC in ('0009998900' , '0009998000' ) OR NAICSPC7 in ('WYWW' , '0YWW' , '000-' ) or pv<0
   then dflag=0;
   else dflag=1;
  run;

/*  title "&y -- checking nonlegit product codes"; */
  data xx&y;
    set fhs.conc&y;
    if dflag=0;
  run;

/*  proc freq data=xx&y;table NAICSPC;run; */

  data xx&y;
     set xx&y;
     if pv<0;
  run;

  title "&y -- Product trailer records with pv<0 (EXCLUDING PV = .)";run; 
  title2 "For plants that produce ready-mix concrete";run; 
  proc print data=xx&y;var survu_id NAICSPC pv;
  where  pv ne .; 
  run; 

  title "&y -- # of product trailer records (and concrete vs not concrete)";run;
  title2 "&y -- for r-m concrete-producing plants";run; 
  title3 "cflag=1: r-m concrete;  cflag=0: not r-m concrete";run;
   proc freq data=fhs.conc&y;table cflag;run;

  title "&y -- # of legit products vs. non-legit or pv<0";run;
  title2 "dflag=1: legit product codes"; run;
   proc freq data=fhs.conc&y;table dflag;run;

  title "&y -- # of physical products";run;
  title2 "pflag=1: product trailer record has physical product data (pqs>0)";
   proc freq data=fhs.carbon&y;table pflag pflag*dflag;run;

%mend;

%chk(2002,%STR('3273200100'));  /* Ready-mix concrete */ 
%chk(2007,%STR('3273200100')); 


* PART 2: CLEANED UP DATA;
* Exclusion based on low ppsr;
* ppsr calculated only on legit codes;
* and using total product for numerator;
* (not an issue for concrete);

%macro chk2(y,p);
* Creating PPSR:
  * 1) Pick only legitimate codes;
         data good&y;
           set fhs.conc&y;
           if dflag=1;     *legitimate product codes only;
         run;
         proc sort data=good&y;by SURVU_ID;run;

  * 2) Sum up all of the products that the estab has;
          proc summary data=good&y nway;
            by SURVU_ID;
            var pv;
          output out=totpv&y sum=totpv;run;


  * 3) Create single product of interest (if needed);
         data cc&y;
           set good&y;
           if NAICSPC in (&p);
         run;

         proc summary data=cc&y nway;
            by SURVU_ID;
            var pv pqs;
          output out=prod&y sum=;run;


  * 4) Create the price and ppsr;

         data chk1&y;
            merge prod&y totpv&y;
            by SURVU_ID;
            if totpv > 0 then ppsr1=pv/totpv;
            else ppsr1=.;
            label ppsr1="Product Specialization Ratio (version 1)";
            if ppsr1=1 then ttflag=1;else ttflag=0;
            label ttflag="TTFLAG=1 when there is only 1 product produced";
            if pqs>0 then price=pv/pqs;
            label price="Constructed Price=PV/PQS";
          run;

          title "&y -- Price properties";run;
          title2 "$ per cubic yard";
          proc univariate data=chk1&y;var price;run;

  * 5) Properties of PPSR;

         title "&y -- PPSR1 properties";run;
         proc univariate data=chk1&y;var ppsr1;run;

         proc freq data=chk1&y;table ttflag;run;
         
         * now just looking at PPSR for multi-product estabs;
         data multi&y;
          set chk1&y;
          if ttflag=0;
         run;

         title2 "Just for multi-product estabs";run;
         proc univariate data=multi&y;var ppsr1;run;
             
   * 6) Applying exlusion rule based on PPSR & PQS;

           data big&y;
             set chk1&y;
             if ppsr1>0.50 then ppsrflag=1;else ppsrflag=0;
             if pqs=0 then phyflag=0;else phyflag=1;
             label phyflag="PHYFLAG=1 when has physical data";
           run;

           title "&y -- estabs with ppsr1>0.50 & physical data";run;
           proc freq data=big&y;table ppsrflag phyflag;run;

           data fhs.concf&y (keep=survu_id price pqs ppsr1 phyflag);
             set big&y;
            /* if phyflag=1; */
           run;
%mend;

%chk2(2002,%STR('3273200100')); 
%chk2(2007,%STR('3273200100')); 


* PART 3: PROPERTIES OF THE FINAL DATASET;

%macro chk3(y,i);

 * 1) Their Industries;
   proc sort data=fhs.concf&y out=concf&y;by survu_id;run;
   data ind&y;
     merge concf&y (in=one) cmf.cmf&y (keep=survu_id NAICS_NEW);
     by survu_id;
     if one;
   run;

   title "&y -- NAICS_NEW of final concrete dataset";run;
   proc freq data=ind&y;table NAICS_NEW;run;

  * 2) Coverage of the Industry;
    proc sort data=fhs.concf&y out=concf&y;by survu_id;run;
    data concf&y;
     set concf&y;
     ourflag=1;
    run;

   data tot&y;
     merge concf&y (keep=survu_id ourflag) cmf.cmf&y (keep=survu_id NAICS_NEW tvs ar tabbed);
     by survu_id;
   run;

   proc freq data=tot&y;table ourflag;run;

   data tot&y;
     set tot&y;
     if ourflag=. then ourflag=0;
     if NAICS_NEW in ("&i");
   run;

   title "&y-- our sample vs total sample";
   proc freq data=tot&y;table ourflag ar*ourflag tabbedr*ourflag; run;

  proc summary data=tot&y;
    var tvs;
    class ourflag;
  output out=sum&y sum=;run;

  title "&y -- total tvs";run;
  proc print data=sum&y;run;

   data nonar&y;
    set tot&y;
    if ar=0;
   run;

  proc summary data=nonar&y;
    var tvs;
    class ourflag;
  output out=sum2&y sum=;run;

  title "&y -- non-ar tvs";run;
  proc print data=sum2&y;run;
%mend;

%chk3(2002,32732000);
%chk3(2007,32732000);


